source("analysis_helpers.R")
tracking_data <- load_app_sessions_tracking_data(c("f33ac92fb8"))
track_sess_times <- tracking_sess_times(tracking_data)
plot_tracking_sess_durations(track_sess_times)
starttime <- ymd_hm("2023-11-15 13:15", tz = "CET") # exercise start - 30 min.
endtime <- ymd_hm("2023-11-15 17:30", tz = "CET") # exercise end + 30 min.
tracking_data <- filter(tracking_data, event_time >= starttime, event_time <= endtime) |>
group_by(track_sess_id) |>
mutate(track_sess_start = min(track_sess_start, min(event_time)),
track_sess_end = min(track_sess_end, max(event_time))) |>
ungroup()
track_sess_times <- tracking_sess_times(tracking_data)
plot_tracking_sess_durations(track_sess_times)
plot_tracking_sess_durations(track_sess_times, by_user_code = TRUE)
plot_tracking_sess_durations_hist(track_sess_times, binwidth = 0.25)
median(track_sess_times$duration) |> as.double(units = "mins")
## [1] 41.37412
dim(tracking_data)
## [1] 265860 40
length(unique(tracking_data$user_app_sess_code))
## [1] 15
length(unique(tracking_data$track_sess_id))
## [1] 15
group_by(tracking_data, track_sess_id) |>
distinct(form_factor) |>
ungroup() |>
count(form_factor)
## # A tibble: 2 × 2
## form_factor n
## <fct> <int>
## 1 desktop 14
## 2 tablet 1
res <- plot_event_type_counts(tracking_data)
res$plot
res <- plot_event_type_counts(tracking_data)
res$table
## # A tibble: 10 × 2
## type n
## <fct> <int>
## 1 chapter 56
## 2 click 1734
## 3 contentscroll 89059
## 4 ex_result 48
## 5 ex_submit 48
## 6 input 946
## 7 input_change 65
## 8 mouse 173163
## 9 question_submit 173
## 10 scroll 568
res <- plot_event_type_counts_per_user(tracking_data)
res$plot
submit_event_counts <- filter(res$table, type %in% c("ex_result", "ex_submit", "question_submit")) |>
pivot_wider(names_from = type, values_from = n, values_fill = 0)
submit_event_counts
## # A tibble: 15 × 4
## user_code question_submit ex_result ex_submit
## <fct> <int> <int> <int>
## 1 0383a657ecfa9596 14 0 0
## 2 1194a5f2318f9118 8 0 0
## 3 122a85ef1e6f8f54 11 0 0
## 4 46b1f0eca5d6c0c2 23 9 9
## 5 521ec99409c16a5c 12 0 0
## 6 52a9b29ca5d8527c 9 0 0
## 7 5ade00fb32f716df 12 10 10
## 8 646e65ff535d24e1 17 0 0
## 9 a4be190adbd9dc41 10 0 0
## 10 c439271a1c7cbb67 9 10 10
## 11 cf98f5634c340955 11 0 0
## 12 eaab3f0249108fff 11 5 5
## 13 ef0e35f9ae0e35a4 9 0 0
## 14 f077ce68e979ee1f 8 12 12
## 15 f78ba1efd64104b4 9 2 2
mutate(submit_event_counts, ex = ex_result + ex_submit) |>
select(-c(ex_result, ex_submit)) |>
filter(question_submit == 0 | ex == 0)
## # A tibble: 9 × 3
## user_code question_submit ex
## <fct> <int> <int>
## 1 0383a657ecfa9596 14 0
## 2 1194a5f2318f9118 8 0
## 3 122a85ef1e6f8f54 11 0
## 4 521ec99409c16a5c 12 0
## 5 52a9b29ca5d8527c 9 0
## 6 646e65ff535d24e1 17 0
## 7 a4be190adbd9dc41 10 0
## 8 cf98f5634c340955 11 0
## 9 ef0e35f9ae0e35a4 9 0
quest_data <- question_submit_data(tracking_data)
res <- plot_question_prop_correct(quest_data)
res$plot
res$table
## # A tibble: 7 × 4
## ex_label n prop_correct sd_correct
## <fct> <int> <dbl> <dbl>
## 1 calc_sens 28 0.536 0.0942
## 2 calc_spec 29 0.517 0.0928
## 3 clinicalstudy1 17 0.882 0.0781
## 4 clinicalstudy2 23 0.652 0.0993
## 5 confmat 44 0.341 0.0715
## 6 P_T_K 18 0.833 0.0878
## 7 ppv_sens 14 0.357 0.128
quiz_tries <- question_or_exercise_submit_tries(quest_data)
plot_question_n_tries(quiz_tries)$plot
all(quiz_tries$ex_correct)
## [1] TRUE
quiz_prop_correct_per_try <- prop_correct_in_ith_try(quest_data)
plot_prop_correct_per_try(quiz_prop_correct_per_try, "Proportion of correct quiz answers per number of tries")
ex_data <- exercise_result_data(tracking_data)
ex_tries <- question_or_exercise_submit_tries(ex_data)
group_by(ex_tries, ex_label) |>
summarise(n = n(),
prop_correct = mean(ex_correct))
## # A tibble: 2 × 3
## ex_label n prop_correct
## <fct> <int> <dbl>
## 1 neg_pred_value 6 0.167
## 2 nvw_a 1 1
plot_exercise_n_tries(ex_tries)
mean(ex_tries$ex_correct)
## [1] 0.2857143
ex_prop_correct_per_try <- prop_correct_in_ith_try(ex_data)
plot_prop_correct_per_try(ex_prop_correct_per_try, "Proportion of correct code submissions per number of tries")
(on try 6, someone in tracking session #185 made a correct submission and then regressed to a wrong submission afterwards)
often, the submitted code is not correct R code
furthermore, students don’t understand that the last output is
the result but assume that the have to use print(...) to
return the result
filter(ex_data, !ex_correct) |>
pull(value) |>
trimws() |>
paste(collapse = "\n---\n") |>
cat()
## prevalence = 0,02
## sens = 0,914
## spec = 0,994
##
## positiveIll = (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## print(positiveIll)
## ---
## prevalence = 0.02
## sens = 0.914
## spec = 0.994
##
## positiveIll = (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## print(positiveIll)
## ---
## prevalence = 0.02
## sens = 0.914
## spec = 0.994
##
## positiveIll = (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## print(1-positiveIll)
## ---
## prevalence = 0.002
## sens = 0.914
## spec = 0.994
##
## positiveIll = (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## print(1-positiveIll)
## ---
## prevalence = 0.002
## sens = 0.914
## spec = 0.994
##
## positiveIll = (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## print(positiveIll)
## ---
## prevalence = 0.002
## sens = 0.914
## spec = 0.994
##
## positiveIll = (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## print(positiveIll)
## ---
## prevalence = 0.002
## sens = 0.914
## spec = 0.994
##
## positiveIll = (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## print(positiveIll)
## (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## ---
## prevalence = 0.002
## sens = 0.914
## spec = 0.994
##
## positiveIll = (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## print(positiveIll)
## (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## ---
## prevalence = 0.002
## sens = 0.914
## spec = 0.994
##
## positiveIll = (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## print(positiveIll)
## (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## ---
## print(prevalence)
## ---
## # P(K-|T-)
##
## t <- 1
## (spec ) / sens
## ---
## # P(K-|T-)
##
## t <- 1
## (spec ) / sens
## ---
## # P(K-|T-)
##
## (spec * (1 - prevalence)) / (1-(0.914 * 0.002 + (1−0.994) * (1−0.002)))
## ---
## # P(K-|T-)
##
## (spec * (1 - prevalence)) / (1 - (0.914 * 0.002 + (1 - 0.994) * (1 - 0.002)))
## ---
## # P(K-|T-)
## 1 - (0.914 * 0.002 + (1 - 0.994) * (1 - 0.002))
## (spec * (1 - prevalence)) / (1 - (0.914 * 0.002 + (1 - 0.994) * (1 - 0.002)))
## ---
## # P(K-|T-)
## 0.994 * 0.998 / 0.992
## (spec * (1 - prevalence)) / (1 - (0.914 * 0.002 + (1 - 0.994) * (1 - 0.002)))
## ---
## 0.994 * 0.998
## ---
## # P(K-|T-)
## (0.994 * 0.998)
## (spec * (1 - prevalence)) / (1 - (0.914 * 0.002 + (1 - 0.994) * (1 - 0.002)))
## ---
## nvw_param <- expand.grid(praev = c(0,1, 0,2, 0,3, 0,4, 0,5, 0,6, 0,7, 0,8, 0,9),
## sens = c(0,8, 0,91, 0,99))
## nvw_param
## ---
## nvw_param <- expand.grid(praev = c(0,1, 0,2, 0,3, 0,4, 0,5, 0,6, 0,7, 0,8, 0,9),
## sens = c(0,8, 0,91, 0,99))
## nvw_param
## ---
## nvw_param <- expand.grid(praev = seq(0,1, 0,9, length.out = 9),
## sens = c(0,8, 0,91, 0,99))
## nvw_param
## ---
## nvw_param <- expand.grid(praev = seq(0,1, 0,9, length.out = 9),
## sens = c(0,8, 0,91, 0,99))
## nvw_param
## ---
## nvw_param <- expand.grid(praev = seq(0,1, 0.9, length.out = 9),
## sens = c(0,8, 0,91, 0,99))
## nvw_param
## ---
## nvw_param <- expand.grid(praev = seq(0.1, 0.9, length.out = 9),
## sens = c(0.8, 0.91, 0.99))
## nvw_param
## ---
## print(prevalence)
## ---
## print(sens)
## ---
## *sens
## ---
## (sens*prevalence)/(sens*prevalence+(1-spec)*(1-prevelance))
## ---
## (sens*prevalence)/(sens*prevalence+(1-spec)*(1-prevelence))
## ---
## (sens*prevalence)/(sens*prevalence+(1-spec)*(1-prevalence))
## ---
## (sens*prevalence)/(sens*prevalence+(1-spec)*(1-prevalence))
## ---
## prevalence = 0.998
## (sens*prevalence)/(sens*prevalence+(1-spec)*(1-prevalence))
## ---
## sens
## ---
## sens
## spec
## ---
## sens
## spec
## prevalence
## ---
## sens
## spec
## prevalence
##
## ntk <- 1 - sens
## ntk
##
## nKnT
## ---
## sens
## spec
## prevalence
##
## ntk <- 1 - sens
## ntk
## ---
## sens
## spec
## prevalence
##
## ntk <- 1 - sens
## ntk
##
## nk <- 1 - spec
## nk
##
## prevalence / (prevalence*nk)
## ---
## sens
## spec
## prevalence
##
## ntk <- 1 - sens
## ntk
##
## nk <- 1 - spec
## nk
##
## prevalence / ((prevalence*nk)+(ntk*spec) )
## ---
## sens
## spec
## prevalence
##
## ntk <- 1 - sens
## ntk
##
## nk <- 1 - spec
## nk
##
## prevalence / ((prevalence*1-spec)+(1-sens*spec) )
## ---
## sens
## spec
## prevalence
##
## ntk <- 1 - sens
## ntk
##
## nk <- 1 - spec
## nk
##
## prevalence / (((1-prevalence)*(1-spec))+(1-sens*spec) )
## ---
## sens
## spec
## prevalence
##
## ntk <- 1 - sens
## ntk
##
## nk <- 1 - spec
## nk
##
## prevalence / (((1-prevalence)*(1-spec))+(1-sens*spec) )
## ---
## sens
## spec
## prevalence
##
## ntk <- 1 - sens
## ntk
##
## nk <- 1 - spec
## nk
##
## (1-prevalence)*(1-spec) / ((1-prevalence)*(1-spec))+(1-sens*spec)
## ---
## sens
## spec
## prevalence
##
## ntk <- 1 - sens
## ntk
##
## nk <- 1 - spec
## nk
##
## (1-prevalence)*(1-spec) / ((1-prevalence)*(1-spec))+(1-sens*spec)
## ---
## (0.994*0.998)/((0.994*0.998)+(1-0.914)*0.002)
tracks_features <- list()
for (track_sess_id in sort(unique(tracking_data$track_sess_id))) {
mouse_tracks_data <- mouse_tracks_for_tracking_sess(tracking_data, track_sess_id)
form_factor <- mouse_tracks_data$form_factor
mouse_tracks_data <- mouse_tracks_data$tracks
tracksess_tracks_features <- mouse_tracks_features(mouse_tracks_data)
tracksess_tracks_features$track_sess_id <- track_sess_id
tracksess_tracks_features$form_factor <- form_factor
tracks_features[[track_sess_id]] <- tracksess_tracks_features
#print(c(track_sess_id, min(mouse_tracks_data$mouse_y), max(mouse_tracks_data$mouse_y)))
print(plot_mouse_tracks_for_tracking_session(mouse_tracks_data, track_sess_id, form_factor))
}
tracks_features_per_track_sess <- bind_rows(tracks_features) |>
arrange(track_sess_id, t_step)
tracks_features_per_track_sess_filled <-
expand.grid(track_sess_id = sort(unique(tracks_features_per_track_sess$track_sess_id)),
t_step = 1:max(tracks_features_per_track_sess$t_step)) |>
left_join(tracks_features_per_track_sess, c('track_sess_id', 't_step')) |>
arrange(track_sess_id, t_step) |>
mutate(mean_t_step_V = ifelse(is.na(mean_t_step_V), 0, mean_t_step_V)) |>
fill(form_factor)
plot_mouse_velocity_heatmap(tracks_features_per_track_sess_filled)
filter(tracks_features_per_track_sess_filled, t_step <= 10) |>
plot_mouse_velocity_heatmap()